延續昨天,今天就來看看範例中是怎麼呼叫API及使用SDK吧!(打開index.html及token.php)
今天html及DOM的部分就先不理會
<!-- Speech SDK reference sdk. -->
<script src="microsoft.cognitiveservices.speech.sdk.bundle.js"></script>
首先引入了SDK中的js檔
接著這個function要在token.php中呼叫API以取得token
並在下面使用SDK的程式碼最後被呼叫
<!-- Speech SDK Authorization token -->
<script>
// Note: Replace the URL with a valid endpoint to retrieve
// authorization tokens for your subscription.
var authorizationEndpoint = "token.php";
function RequestAuthorizationToken() {
if (authorizationEndpoint) {
var a = new XMLHttpRequest();
a.open("GET", authorizationEndpoint);
a.setRequestHeader("Content-Type", "application/x-www-form-urlencoded");
a.send("");
a.onload = function() {
var token = JSON.parse(atob(this.responseText.split(".")[1]));
serviceRegion.value = token.region;
authorizationToken = this.responseText;
subscriptionKey.disabled = true;
subscriptionKey.value = "using authorization token (hit F5 to refresh)";
console.log("Got an authorization token: " + token);
}
}
}
</script>
authorizationEndpoint
這個變數要放的是token.php
的路徑
(如果是放在同一個資料夾的同一層就無需更改範例程式碼)
function RequestAuthorizationToken
中
透過XMLHttpRequest來GET token.php取得的token
放到authorizationToken
這個變數
並且其位置(region)放到serviceRegion.value
再來便要使用SDK了
// Speech SDK USAGE
// status fields and start button in UI
var phraseDiv;
var startRecognizeOnceAsyncButton;
// subscription key and region for speech services.
var subscriptionKey, serviceRegion;
var authorizationToken;
var SpeechSDK;
var recognizer;
document.addEventListener("DOMContentLoaded", function () {
startRecognizeOnceAsyncButton = document.getElementById("startRecognizeOnceAsyncButton");
subscriptionKey = document.getElementById("subscriptionKey");
serviceRegion = document.getElementById("serviceRegion");
phraseDiv = document.getElementById("phraseDiv");
startRecognizeOnceAsyncButton.addEventListener("click", function () {
startRecognizeOnceAsyncButton.disabled = true;
phraseDiv.innerHTML = "";
// if we got an authorization token, use the token. Otherwise use the provided subscription key
var speechConfig;
if (authorizationToken) {
speechConfig = SpeechSDK.SpeechConfig.fromAuthorizationToken(authorizationToken, serviceRegion.value);
} else {
if (subscriptionKey.value === "" || subscriptionKey.value === "subscription") {
alert("Please enter your Microsoft Cognitive Services Speech subscription key!");
return;
}
speechConfig = SpeechSDK.SpeechConfig.fromSubscription(subscriptionKey.value, serviceRegion.value);
}
speechConfig.speechRecognitionLanguage = "zh-TW";
var audioConfig = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);
recognizer.recognizeOnceAsync(
function (result) {
startRecognizeOnceAsyncButton.disabled = false;
phraseDiv.innerHTML += result.text;
window.console.log(result);
recognizer.close();
recognizer = undefined;
},
function (err) {
startRecognizeOnceAsyncButton.disabled = false;
phraseDiv.innerHTML += err;
window.console.log(err);
recognizer.close();
recognizer = undefined;
});
});
if (!!window.SpeechSDK) {
SpeechSDK = window.SpeechSDK;
startRecognizeOnceAsyncButton.disabled = false;
document.getElementById('content').style.display = 'block';
document.getElementById('warning').style.display = 'none';
// in case we have a function for getting an authorization token, call it.
if (typeof RequestAuthorizationToken === "function") {
RequestAuthorizationToken();
}
}
});
一開始宣告了幾個變數
分別用來放UI介面的DOM元素以及API所需的key, region(上面有用到)以及SDK的物件等
// status fields and start button in UI
var phraseDiv;
var startRecognizeOnceAsyncButton;
// subscription key and region for speech services.
var subscriptionKey, serviceRegion;
var authorizationToken;
var SpeechSDK;
var recognizer;
接著如果成功拿到授權的token
就利用SDK的fromAuthorizationToken()
放到speechConfig
中
// if we got an authorization token, use the token. Otherwise use the provided subscription key
var speechConfig;
if (authorizationToken) {
speechConfig = SpeechSDK.SpeechConfig.fromAuthorizationToken(authorizationToken, serviceRegion.value);
} else {
if (subscriptionKey.value === "" || subscriptionKey.value === "subscription") {
alert("Please enter your Microsoft Cognitive Services Speech subscription key!");
return;
}
speechConfig = SpeechSDK.SpeechConfig.fromSubscription(subscriptionKey.value, serviceRegion.value);
}
然後設定辨識的語言,預設為美國地區的英文
要改成中文的話可以改為"zh-TW"
其他則如https://docs.microsoft.com/zh-tw/azure/cognitive-services/speech-service/language-support
目前還沒找到怎麼混著不同語言的方法
speechConfig.speechRecognitionLanguage = "zh-TW";
然後音訊的config則來自SDK的fromDefaultMicrophoneInput()
也就是使用電腦預設的麥克風
var audioConfig = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
兩個config都設定完成,就可以來初始化辨識器了
recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);
範例中使用的是非同步的一次性辨識
在偵測到一段沉默時即停止辨識
在這段程式碼中,會將結果的文字放到網頁頁面的文字框中
recognizer.recognizeOnceAsync(
function (result) {
startRecognizeOnceAsyncButton.disabled = false;
phraseDiv.innerHTML += result.text;
window.console.log(result);
recognizer.close();
recognizer = undefined;
},
function (err) {
startRecognizeOnceAsyncButton.disabled = false;
phraseDiv.innerHTML += err;
window.console.log(err);
recognizer.close();
recognizer = undefined;
});